#!/bin/bash

echo "Creating sequence files from bymoviegroups data"
  mahout seqdirectory \
    -i bymovie \
    -o bymovie-seq -ow

  echo "Converting sequence files to vectors"
  mahout seq2sparse \
    -i bymovie-seq \
    -o bymovie-vectors  -lnorm -nv  -wt tfidf

  echo "Creating training and holdout set with a random 80-20 split of the generated vector dataset"
  mahout split \
    -i bymovie-vectors/tfidf-vectors \
    --trainingOutput bymovie-train-vectors \
    --testOutput bymovie-test-vectors  \
    --randomSelectionPct 40 --overwrite --sequenceFiles -xm sequential

  echo "Training Naive Bayes model"
  mahout trainnb \
    -i bymovie-train-vectors -el \
    -o model \
    -li labelindex \
    -ow $c

  echo "Self testing on training set"

  mahout testnb \
    -i bymovie-train-vectors\
    -m model \
    -l labelindex \
    -ow -o bymovie-testing $c

  echo "Testing on holdout set"

  mahout testnb \
    -i bymovie-test-vectors\
    -m model \
    -l labelindex \
    -ow -o bymovie-testing $c
